{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np\n",
    "import re\n",
    "\n",
    "from pymongo import MongoClient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "from zhtools import langconv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 从mongo数据库中读取数据\n",
    "client = MongoClient()\n",
    "db = client['douban']\n",
    "table = db['world_film']\n",
    "df_all = pd.DataFrame(list(table.find()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(126711, 13)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>actor</th>\n",
       "      <th>date</th>\n",
       "      <th>director</th>\n",
       "      <th>id</th>\n",
       "      <th>language</th>\n",
       "      <th>rate</th>\n",
       "      <th>rating_num</th>\n",
       "      <th>region</th>\n",
       "      <th>runtime</th>\n",
       "      <th>title</th>\n",
       "      <th>type</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5c024433f625b7c00bfbe877</td>\n",
       "      <td>[扎克·埃夫隆, 莱斯利·曼恩, 托马斯·列农, 马修·派瑞, Tyler Steelman...</td>\n",
       "      <td>[2009-04-17(美国)]</td>\n",
       "      <td>[布尔·斯蒂尔斯]</td>\n",
       "      <td>2213591</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.3</td>\n",
       "      <td>155828</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[102 分钟]</td>\n",
       "      <td>重返十七岁 17 Again</td>\n",
       "      <td>[剧情, 喜剧, 爱情]</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        _id  \\\n",
       "0  5c024433f625b7c00bfbe877   \n",
       "\n",
       "                                               actor              date  \\\n",
       "0  [扎克·埃夫隆, 莱斯利·曼恩, 托马斯·列农, 马修·派瑞, Tyler Steelman...  [2009-04-17(美国)]   \n",
       "\n",
       "    director       id language rate rating_num region   runtime  \\\n",
       "0  [布尔·斯蒂尔斯]  2213591     [英语]  7.3     155828   [美国]  [102 分钟]   \n",
       "\n",
       "            title          type  year  \n",
       "0  重返十七岁 17 Again  [剧情, 喜剧, 爱情]  2009  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据清洗"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 将各列的空列表转为空值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将各列空列表转为None\n",
    "# list如果空列表，if x 时返回的是False\n",
    "for col in df_all.columns:\n",
    "    df_all[col] = df_all[col].map(lambda x:x if x else None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 去掉部分字段为空的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------\n",
      "False    126711\n",
      "Name: _id, dtype: int64\n",
      "----------------\n",
      "False    99136\n",
      "True     27575\n",
      "Name: actor, dtype: int64\n",
      "----------------\n",
      "False    106360\n",
      "True      20351\n",
      "Name: date, dtype: int64\n",
      "----------------\n",
      "False    118695\n",
      "True       8016\n",
      "Name: director, dtype: int64\n",
      "----------------\n",
      "False    126711\n",
      "Name: id, dtype: int64\n",
      "----------------\n",
      "False    121569\n",
      "True       5142\n",
      "Name: language, dtype: int64\n",
      "----------------\n",
      "True     85065\n",
      "False    41646\n",
      "Name: rate, dtype: int64\n",
      "----------------\n",
      "True     85065\n",
      "False    41646\n",
      "Name: rating_num, dtype: int64\n",
      "----------------\n",
      "False    126395\n",
      "True        316\n",
      "Name: region, dtype: int64\n",
      "----------------\n",
      "False    85622\n",
      "True     41089\n",
      "Name: runtime, dtype: int64\n",
      "----------------\n",
      "False    126711\n",
      "Name: title, dtype: int64\n",
      "----------------\n",
      "False    126711\n",
      "Name: type, dtype: int64\n",
      "----------------\n",
      "False    126364\n",
      "True        347\n",
      "Name: year, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 查看各列的空值情况\n",
    "for col in df_all.columns:\n",
    "    print(\"----------------\")\n",
    "    print(df_all[col].isnull().value_counts())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "如果以下的数据出现空值，则整行去掉：\n",
    "\n",
    "* 演员 actor\n",
    "* 上映时间 date\n",
    "* 导演 director\n",
    "* 语言 language\n",
    "* 类型 type\n",
    "* 地区 region"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all1 = df_all.loc[~(df_all['actor'].isnull() | df_all['date'].isnull() | \n",
    "                       df_all['director'].isnull()|df_all['language'].isnull()|\n",
    "                       df_all['type'].isnull() | df_all['region'].isnull() )]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(82564, 13)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all1.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>actor</th>\n",
       "      <th>date</th>\n",
       "      <th>director</th>\n",
       "      <th>id</th>\n",
       "      <th>language</th>\n",
       "      <th>rate</th>\n",
       "      <th>rating_num</th>\n",
       "      <th>region</th>\n",
       "      <th>runtime</th>\n",
       "      <th>title</th>\n",
       "      <th>type</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5c024433f625b7c00bfbe877</td>\n",
       "      <td>[扎克·埃夫隆, 莱斯利·曼恩, 托马斯·列农, 马修·派瑞, Tyler Steelman...</td>\n",
       "      <td>[2009-04-17(美国)]</td>\n",
       "      <td>[布尔·斯蒂尔斯]</td>\n",
       "      <td>2213591</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.3</td>\n",
       "      <td>155828</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[102 分钟]</td>\n",
       "      <td>重返十七岁 17 Again</td>\n",
       "      <td>[剧情, 喜剧, 爱情]</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        _id  \\\n",
       "0  5c024433f625b7c00bfbe877   \n",
       "\n",
       "                                               actor              date  \\\n",
       "0  [扎克·埃夫隆, 莱斯利·曼恩, 托马斯·列农, 马修·派瑞, Tyler Steelman...  [2009-04-17(美国)]   \n",
       "\n",
       "    director       id language rate rating_num region   runtime  \\\n",
       "0  [布尔·斯蒂尔斯]  2213591     [英语]  7.3     155828   [美国]  [102 分钟]   \n",
       "\n",
       "            title          type  year  \n",
       "0  重返十七岁 17 Again  [剧情, 喜剧, 爱情]  2009  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all1.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 填充上映年份"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "有些电影有上映日期，但年份为空，下面填充这一部分数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>actor</th>\n",
       "      <th>date</th>\n",
       "      <th>director</th>\n",
       "      <th>id</th>\n",
       "      <th>language</th>\n",
       "      <th>rate</th>\n",
       "      <th>rating_num</th>\n",
       "      <th>region</th>\n",
       "      <th>runtime</th>\n",
       "      <th>title</th>\n",
       "      <th>type</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1263</th>\n",
       "      <td>5c02453bf625b7c00bfc1f04</td>\n",
       "      <td>[松平哲郎, 品川美月, 松永美規, みぶ真也, 浅尾典彦, 宮瀬れい]</td>\n",
       "      <td>[2010-07-23]</td>\n",
       "      <td>[阿見松ノ介]</td>\n",
       "      <td>5273374</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>None</td>\n",
       "      <td>心灵音 The Movie 心霊音 THE MOVIE</td>\n",
       "      <td>[恐怖]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1546</th>\n",
       "      <td>5c024543f625b7c00bfc213c</td>\n",
       "      <td>[Christian Campbell, Sarah Chalke, Carly Pope]</td>\n",
       "      <td>[1998-03-22]</td>\n",
       "      <td>[Christopher Leitch]</td>\n",
       "      <td>1298504</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>等着你回来 I've Been Waiting for You (TV)</td>\n",
       "      <td>[恐怖, 惊栗]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4417</th>\n",
       "      <td>5c0246f5f625b7c00bfc676e</td>\n",
       "      <td>[羽田圭子, 羽田圭子, 雨宮夕子]</td>\n",
       "      <td>[1997-02-28(日本)]</td>\n",
       "      <td>[伊藤正治]</td>\n",
       "      <td>26947984</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>None</td>\n",
       "      <td>生保の女 それぞれの事情</td>\n",
       "      <td>[剧情, 情色]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7490</th>\n",
       "      <td>5c024aa2f625b7c00bfcb5a3</td>\n",
       "      <td>[欧阳震华, 李铭顺, 张世, 徐峥, 寇振海, 保剑锋, 宁静, 范文芳, 牛萌萌, 唐一...</td>\n",
       "      <td>[2005]</td>\n",
       "      <td>[刘志]</td>\n",
       "      <td>1830531</td>\n",
       "      <td>[普通话]</td>\n",
       "      <td>6.4</td>\n",
       "      <td>724</td>\n",
       "      <td>[中国大陆]</td>\n",
       "      <td>None</td>\n",
       "      <td>福禄寿三星报喜</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8467</th>\n",
       "      <td>5c024b52f625b7c00bfcdb69</td>\n",
       "      <td>[笠原弘子, 坂本千夏, 荒川太郎, 渡辺真砂子, 椎名碧流, 勝田治美]</td>\n",
       "      <td>[1993-08-14]</td>\n",
       "      <td>[难波日登志]</td>\n",
       "      <td>5349437</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>None</td>\n",
       "      <td>3丁目のタマ おねがい!モモちゃんを捜して!!</td>\n",
       "      <td>[动画, 儿童]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8520</th>\n",
       "      <td>5c024b58f625b7c00bfcddb4</td>\n",
       "      <td>[小林裕介, 坂本真绫, 村中知, Lynn, 木村珠莉, 河西健吾, 福岛润, 水树奈奈,...</td>\n",
       "      <td>[2016-12-10]</td>\n",
       "      <td>[江崎慎平]</td>\n",
       "      <td>26933232</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>[103分钟]</td>\n",
       "      <td>怪物弹珠 THE MOVIE モンスターストライク THE MOVIE はじまりの場所へ</td>\n",
       "      <td>[剧情, 动作, 动画, 儿童, 奇幻, 冒险]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8609</th>\n",
       "      <td>5c024b62f625b7c00bfce28a</td>\n",
       "      <td>[Mel Blanc]</td>\n",
       "      <td>[1979-11-27]</td>\n",
       "      <td>[Friz Freleng, Chuck Jones]</td>\n",
       "      <td>3545184</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Bugs Bunny's Looney Christmas Tales</td>\n",
       "      <td>[喜剧, 动画, 家庭]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8705</th>\n",
       "      <td>5c024b6af625b7c00bfce7d7</td>\n",
       "      <td>[Mel Blanc]</td>\n",
       "      <td>[1980-04-01]</td>\n",
       "      <td>[Friz Freleng]</td>\n",
       "      <td>3431696</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Daffy Ducks Easter Show</td>\n",
       "      <td>[动画]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8751</th>\n",
       "      <td>5c024b70f625b7c00bfceac6</td>\n",
       "      <td>[田中真弓, 富山敬, 岛田敏, 小原乃梨子, 坂本千夏]</td>\n",
       "      <td>[1983]</td>\n",
       "      <td>[笹川博]</td>\n",
       "      <td>5162570</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>None</td>\n",
       "      <td>时间飞船系列第七部 イタダキマン</td>\n",
       "      <td>[动画]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8783</th>\n",
       "      <td>5c024b73f625b7c00bfcebf8</td>\n",
       "      <td>[德本恭敏, 千叶纱子, 佐藤せつじ, 长岛雄一, 小林沙苗, 细井治, 川中子雅人, 小松...</td>\n",
       "      <td>[2007]</td>\n",
       "      <td>[Mamoru Kanbe]</td>\n",
       "      <td>5176755</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>None</td>\n",
       "      <td>鬼公子 炎魔：炎魔 Demon Prince Enma: Enma</td>\n",
       "      <td>[动画]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9318</th>\n",
       "      <td>5c024bc7f625b7c00bfd09d3</td>\n",
       "      <td>[Bette Davis, David Ackroyd, Rosanna Arquette,...</td>\n",
       "      <td>[1978-01-23]</td>\n",
       "      <td>[Leo Penn]</td>\n",
       "      <td>2083813</td>\n",
       "      <td>[English]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[USA]</td>\n",
       "      <td>None</td>\n",
       "      <td>破碎之家的黑暗秘密 \"The Dark Secret of Harvest Home\"</td>\n",
       "      <td>[剧情, 悬疑, 恐怖]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9783</th>\n",
       "      <td>5c024c54f625b7c00bfd71ff</td>\n",
       "      <td>[Niki Rubin, Yelena Sabel, Kerri Taylor]</td>\n",
       "      <td>[2004]</td>\n",
       "      <td>[Bill Zebub]</td>\n",
       "      <td>5363698</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[97分钟]</td>\n",
       "      <td>难逃魔掌 Jesus Christ: Serial Rapist</td>\n",
       "      <td>[犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10171</th>\n",
       "      <td>5c024c87f625b7c00bfd7d85</td>\n",
       "      <td>[Amrou Al-Kadhi, Nigel Allen, Ashley Campbell]</td>\n",
       "      <td>[2016-08-22(英国)]</td>\n",
       "      <td>[Simon Anderson, Kristen Bjorn, Karl Eccleston...</td>\n",
       "      <td>26853004</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[英国]</td>\n",
       "      <td>[2小时13分钟]</td>\n",
       "      <td>男孩电影15 Boys on Film 15</td>\n",
       "      <td>[喜剧, 同性, 犯罪, 奇幻]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10563</th>\n",
       "      <td>5c024caaf625b7c00bfd87cb</td>\n",
       "      <td>[Kathleen Robertson]</td>\n",
       "      <td>[2002-03-18]</td>\n",
       "      <td>[Alex Chapple]</td>\n",
       "      <td>1862712</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[加拿大]</td>\n",
       "      <td>[91分钟]</td>\n",
       "      <td>Torso: The Evelyn Dick Story</td>\n",
       "      <td>[犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11185</th>\n",
       "      <td>5c024cbef625b7c00bfd940f</td>\n",
       "      <td>[刘冠霖, 李洋, 高川, 胡彩虹, 矫菲]</td>\n",
       "      <td>[2014]</td>\n",
       "      <td>[郑锋]</td>\n",
       "      <td>26373030</td>\n",
       "      <td>[汉语普通话]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[中国大陆]</td>\n",
       "      <td>[90分钟]</td>\n",
       "      <td>黔山打拐</td>\n",
       "      <td>[剧情, 犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11425</th>\n",
       "      <td>5c024cc8f625b7c00bfd9c7d</td>\n",
       "      <td>[周孝安, 吴慷仁]</td>\n",
       "      <td>[2007]</td>\n",
       "      <td>[陈俊志]</td>\n",
       "      <td>2297254</td>\n",
       "      <td>[汉语普通话]</td>\n",
       "      <td>6.1</td>\n",
       "      <td>1122</td>\n",
       "      <td>[台湾]</td>\n",
       "      <td>[12分钟(公映版)]</td>\n",
       "      <td>沿海岸线征友 沿海岸线徵友</td>\n",
       "      <td>[剧情, 同性]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11830</th>\n",
       "      <td>5c024d88f625b7c00bfda7ce</td>\n",
       "      <td>[Linus Roache, James Cromwell, David Paymer, M...</td>\n",
       "      <td>[2002-08-25]</td>\n",
       "      <td>[Robert Dornhelm]</td>\n",
       "      <td>2073229</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.8</td>\n",
       "      <td>36</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>罗伯特·肯尼迪 RFK</td>\n",
       "      <td>[剧情, 传记, 历史]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12268</th>\n",
       "      <td>5c024de4f625b7c00bfdb573</td>\n",
       "      <td>[Victor Garber, Saul Rubinek, Michael Dolan, M...</td>\n",
       "      <td>[1988]</td>\n",
       "      <td>[David Greene]</td>\n",
       "      <td>5090965</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[加拿大]</td>\n",
       "      <td>None</td>\n",
       "      <td>Liberace: Behind the Music</td>\n",
       "      <td>[剧情, 同性, 音乐, 传记]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12395</th>\n",
       "      <td>5c024dfdf625b7c00bfdb89d</td>\n",
       "      <td>[米歇尔·李, 彼得·里格特, 詹姆斯·法伦蒂洛, 芭芭拉·帕金斯, 肯尼斯·威尔什]</td>\n",
       "      <td>[1998]</td>\n",
       "      <td>[Bruce McDonald]</td>\n",
       "      <td>5094442</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[加拿大]</td>\n",
       "      <td>[120分钟]</td>\n",
       "      <td>Scandalous Me: The Jacqueline Susann Story</td>\n",
       "      <td>[剧情, 传记]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13124</th>\n",
       "      <td>5c027d18f625b7c00bfe0098</td>\n",
       "      <td>[田中真弓, 大谷育江, 矢尾一树, 平田广明, 长岛雄一, 中井和哉, 冈村明美, 山口由里子]</td>\n",
       "      <td>[1999]</td>\n",
       "      <td>[宇田钢之介]</td>\n",
       "      <td>5339100</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>7.1</td>\n",
       "      <td>488</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>[18分钟]</td>\n",
       "      <td>海贼王特别篇2路飞的绝技 海贼王SP02  路飞的绝技</td>\n",
       "      <td>[剧情, 动画, 冒险]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13549</th>\n",
       "      <td>5c028621f625b7c00bfe282e</td>\n",
       "      <td>[许飞, 黄觉, 黄渤, 张炜迅, 刘畅, 陈国新, 陈升]</td>\n",
       "      <td>[2007-09-06]</td>\n",
       "      <td>[韩可一]</td>\n",
       "      <td>2237966</td>\n",
       "      <td>[汉语普通话]</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2374</td>\n",
       "      <td>[中国]</td>\n",
       "      <td>None</td>\n",
       "      <td>飞行日志</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16084</th>\n",
       "      <td>5c0288ecf625b7c00bfe6e97</td>\n",
       "      <td>[George Carlin]</td>\n",
       "      <td>[1992]</td>\n",
       "      <td>[Rocco Urbisci]</td>\n",
       "      <td>3706177</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>9.2</td>\n",
       "      <td>219</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>乔治·卡林：挤在纽约 George Carlin: Jammin' in New York</td>\n",
       "      <td>[喜剧, 脱口秀]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16475</th>\n",
       "      <td>5c0289e8f625b7c00bfe7abb</td>\n",
       "      <td>[乔治·卡林]</td>\n",
       "      <td>[1988]</td>\n",
       "      <td>[Bruce Gowers]</td>\n",
       "      <td>4719672</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.8</td>\n",
       "      <td>88</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>乔治·卡林：我在新泽西干啥呀？ George Carlin: What Am I Doing...</td>\n",
       "      <td>[喜剧]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17001</th>\n",
       "      <td>5c028a48f625b7c00bfe8805</td>\n",
       "      <td>[拉德·马尔科维奇, 波格丹·迪格里克, 德拉甘·尼柯立克]</td>\n",
       "      <td>[1989]</td>\n",
       "      <td>[戈兰·马克维奇]</td>\n",
       "      <td>3749810</td>\n",
       "      <td>[塞尔维亚语]</td>\n",
       "      <td>7.8</td>\n",
       "      <td>53</td>\n",
       "      <td>[南斯拉夫]</td>\n",
       "      <td>[98分钟]</td>\n",
       "      <td>交会点 Sabirni centar</td>\n",
       "      <td>[剧情, 喜剧, 奇幻]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17538</th>\n",
       "      <td>5c028a83f625b7c00bfe9189</td>\n",
       "      <td>[Alyson Michalka, Amanda Michalka]</td>\n",
       "      <td>[2006-03-24]</td>\n",
       "      <td>[Francine McDougall]</td>\n",
       "      <td>1823230</td>\n",
       "      <td>[English]</td>\n",
       "      <td>5.6</td>\n",
       "      <td>484</td>\n",
       "      <td>[USA]</td>\n",
       "      <td>None</td>\n",
       "      <td>奶牛美女 Cow Belles</td>\n",
       "      <td>[喜剧, 爱情, 家庭]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18007</th>\n",
       "      <td>5c028abbf625b7c00bfea03a</td>\n",
       "      <td>[海伦·米伦, Brian Stirner, 理查德·帕斯科, 安格哈拉德·瑞丝, 詹姆斯·...</td>\n",
       "      <td>[1978-12-17]</td>\n",
       "      <td>[Basil Coleman]</td>\n",
       "      <td>1958686</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>8.3</td>\n",
       "      <td>54</td>\n",
       "      <td>[英国]</td>\n",
       "      <td>None</td>\n",
       "      <td>皆大欢喜 As you like it</td>\n",
       "      <td>[喜剧]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18466</th>\n",
       "      <td>5c028afcf625b7c00bfea4e2</td>\n",
       "      <td>[Jim Thornton ...  Johnny Gomez, Chris Edgerly...</td>\n",
       "      <td>[1998-01-25]</td>\n",
       "      <td>[Eric Fogel]</td>\n",
       "      <td>2054639</td>\n",
       "      <td>[english]</td>\n",
       "      <td>8.5</td>\n",
       "      <td>121</td>\n",
       "      <td>[usa]</td>\n",
       "      <td>None</td>\n",
       "      <td>MTV 名人死斗 Celebrity Deathmatch</td>\n",
       "      <td>[喜剧, 动作, 动画]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18677</th>\n",
       "      <td>5c028ea7f625b7c00bff0a25</td>\n",
       "      <td>[乔·佩西, 布兰登·费舍, 莫伊拉·凯利, 帕特里克·德姆西]</td>\n",
       "      <td>[1994-04-29]</td>\n",
       "      <td>[阿莱克·凯西西恩]</td>\n",
       "      <td>1294104</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>8.1</td>\n",
       "      <td>420</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>乞丐博士 With Honors</td>\n",
       "      <td>[剧情, 喜剧]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20417</th>\n",
       "      <td>5c029707f625b7c00bff4454</td>\n",
       "      <td>[杜夫·龙格尔]</td>\n",
       "      <td>[2008-01-15]</td>\n",
       "      <td>[Dolph Lundgren]</td>\n",
       "      <td>2379445</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>6.1</td>\n",
       "      <td>90</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>摩托教父 Missionary Man</td>\n",
       "      <td>[剧情, 动作, 惊悚]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21279</th>\n",
       "      <td>5c029a21f625b7c00bff60ab</td>\n",
       "      <td>[Edward Atterton, Amanda Ryan, Jacqueline Bisset]</td>\n",
       "      <td>[2000-01-09]</td>\n",
       "      <td>[布赖恩 特伦查德－史密斯 (Brian Trenchard-Smith)]</td>\n",
       "      <td>1302656</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.3</td>\n",
       "      <td>96</td>\n",
       "      <td>[英国 / 美国]</td>\n",
       "      <td>[90分钟]</td>\n",
       "      <td>烈血英伦 Britannic</td>\n",
       "      <td>[剧情, 动作, 爱情, 历史, 战争, 灾难]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97942</th>\n",
       "      <td>5c04ea84f625b7c00b135c2c</td>\n",
       "      <td>[Karin Petersen, Nicolas Silberg]</td>\n",
       "      <td>[1971]</td>\n",
       "      <td>[Yannick Andréi]</td>\n",
       "      <td>5148594</td>\n",
       "      <td>[法语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[法国]</td>\n",
       "      <td>None</td>\n",
       "      <td>蒙梭罗夫人 La dame de Monsoreau</td>\n",
       "      <td>[剧情, 历史]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99273</th>\n",
       "      <td>5c04f3edf625b7c00b170049</td>\n",
       "      <td>[费奥纳·肖, 格拉汉姆·克罗登, 理查德·布雷默, 朱利安·林希德-图特]</td>\n",
       "      <td>[1997]</td>\n",
       "      <td>[Deborah Warner]</td>\n",
       "      <td>5109071</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[英国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Richard II</td>\n",
       "      <td>[剧情, 历史]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99336</th>\n",
       "      <td>5c04f407f625b7c00b1720fc</td>\n",
       "      <td>[卡斯派·范·戴恩, 杰姆·沃尔福特, 马克·威廉斯]</td>\n",
       "      <td>[2001-11-10]</td>\n",
       "      <td>[Sidney J. Furie]</td>\n",
       "      <td>1303653</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[加拿大]</td>\n",
       "      <td>[113分钟]</td>\n",
       "      <td>回家 Going Back</td>\n",
       "      <td>[剧情, 战争]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99905</th>\n",
       "      <td>5c05ce35f625b7c00b186484</td>\n",
       "      <td>[Bill Hicks]</td>\n",
       "      <td>[1993]</td>\n",
       "      <td>[Chris Bould]</td>\n",
       "      <td>4935206</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>6.6</td>\n",
       "      <td>40</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>比尔·希克斯：启示录 Bill Hicks: Revelations</td>\n",
       "      <td>[喜剧]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100301</th>\n",
       "      <td>5c05cf8cf625b7c00b1afc2e</td>\n",
       "      <td>[Melissa Gilbert, Bruce Boxleitner]</td>\n",
       "      <td>[1995-09-17]</td>\n",
       "      <td>[Richard A. Colla]</td>\n",
       "      <td>3087636</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[171分钟]</td>\n",
       "      <td>卓雅 Zoya</td>\n",
       "      <td>[剧情, 历史]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100742</th>\n",
       "      <td>5c05f5cdf625b7c00b2c325e</td>\n",
       "      <td>[米夏埃尔·圭斯德克, Myriam Abbas, 苏珊妮·博曼, 多米尼克·霍卫兹, Ol...</td>\n",
       "      <td>[1999-08-12]</td>\n",
       "      <td>[安德里亚斯·德里森]</td>\n",
       "      <td>1302633</td>\n",
       "      <td>[德语]</td>\n",
       "      <td>8.9</td>\n",
       "      <td>37</td>\n",
       "      <td>[德国]</td>\n",
       "      <td>None</td>\n",
       "      <td>晚的形状 Nachtgestalten</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101479</th>\n",
       "      <td>5c05f61ff625b7c00b2c66d0</td>\n",
       "      <td>[孟俊, 朱亚英]</td>\n",
       "      <td>[2010]</td>\n",
       "      <td>[丁 正]</td>\n",
       "      <td>5342064</td>\n",
       "      <td>[普通话/国语]</td>\n",
       "      <td>7.6</td>\n",
       "      <td>96</td>\n",
       "      <td>[中国大陆]</td>\n",
       "      <td>None</td>\n",
       "      <td>微笑</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101726</th>\n",
       "      <td>5c05f63bf625b7c00b2c73e2</td>\n",
       "      <td>[Stefanie Stappenbeck, Benjamin Sadler, Yvonne...</td>\n",
       "      <td>[2004-01-22]</td>\n",
       "      <td>[乌特·维兰德]</td>\n",
       "      <td>2069799</td>\n",
       "      <td>[German]</td>\n",
       "      <td>7.6</td>\n",
       "      <td>44</td>\n",
       "      <td>[Germany]</td>\n",
       "      <td>None</td>\n",
       "      <td>意大利人和他们的甜点 Italiener und andere Süßigkeiten</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102244</th>\n",
       "      <td>5c05f682f625b7c00b2cbdf7</td>\n",
       "      <td>[李静, 玛利娅, 许烨]</td>\n",
       "      <td>[2009]</td>\n",
       "      <td>[方军亮]</td>\n",
       "      <td>3546155</td>\n",
       "      <td>[普通话]</td>\n",
       "      <td>5.7</td>\n",
       "      <td>370</td>\n",
       "      <td>[中国大陆]</td>\n",
       "      <td>[95分钟]</td>\n",
       "      <td>土婆婆PK洋媳妇</td>\n",
       "      <td>[剧情, 喜剧]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108960</th>\n",
       "      <td>5c05fb02f625b7c00b30d136</td>\n",
       "      <td>[帕拉巴斯 Prabhas, 特丽莎·克里希南, Mohan Babu]</td>\n",
       "      <td>[2008]</td>\n",
       "      <td>[Puri Jagannath]</td>\n",
       "      <td>5185546</td>\n",
       "      <td>[泰卢固语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[印度]</td>\n",
       "      <td>[150分钟]</td>\n",
       "      <td>Bujjigaadu: Made in Chennai</td>\n",
       "      <td>[动作]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110769</th>\n",
       "      <td>5c05fba0f625b7c00b312e2a</td>\n",
       "      <td>[Parviz Parastouei, Darine Hamze, Farideh Sepa...</td>\n",
       "      <td>[2009-03-25]</td>\n",
       "      <td>[Maziar Bahari]</td>\n",
       "      <td>3629234</td>\n",
       "      <td>[伊朗语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[伊朗]</td>\n",
       "      <td>[95分钟]</td>\n",
       "      <td>律法之书 The Book of Law</td>\n",
       "      <td>[剧情, 爱情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110926</th>\n",
       "      <td>5c05fbaef625b7c00b313491</td>\n",
       "      <td>[李炳辰 Bingchen Li, 初星一, 焦婷]</td>\n",
       "      <td>[2011-09]</td>\n",
       "      <td>[武圣基]</td>\n",
       "      <td>10453724</td>\n",
       "      <td>[汉语普通话]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[中国大陆]</td>\n",
       "      <td>None</td>\n",
       "      <td>万年飘香</td>\n",
       "      <td>[爱情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111184</th>\n",
       "      <td>5c05fbc3f625b7c00b313fbd</td>\n",
       "      <td>[琳恩·柯林斯, 凯丽·加纳, 贾斯汀·塞洛克斯, 弗兰克·兰格拉]</td>\n",
       "      <td>[2006]</td>\n",
       "      <td>[Nanda Anand]</td>\n",
       "      <td>5221260</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[90分钟]</td>\n",
       "      <td>Return to Rajapur</td>\n",
       "      <td>[剧情, 爱情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113214</th>\n",
       "      <td>5c05fccef625b7c00b322c95</td>\n",
       "      <td>[Jenna Dewan, Ashley Benson]</td>\n",
       "      <td>[2008-08-02]</td>\n",
       "      <td>[Tom McLoughlin]</td>\n",
       "      <td>3852275</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>靓丽五人组：德州拉拉队丑闻 Fab Five: The Texas Cheerleader ...</td>\n",
       "      <td>[惊悚]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115589</th>\n",
       "      <td>5c05fdaff625b7c00b32a48a</td>\n",
       "      <td>[Ben Gazzara, Mickey Rooney, Kirk Douglas, Cyd...</td>\n",
       "      <td>[2008-08-14]</td>\n",
       "      <td>[William Karel]</td>\n",
       "      <td>4215281</td>\n",
       "      <td>[英语 / 法语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国 / 法国]</td>\n",
       "      <td>[88分钟]</td>\n",
       "      <td>Meurtres à l'Empire State Building (TV)</td>\n",
       "      <td>[犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115608</th>\n",
       "      <td>5c05fdb1f625b7c00b32a6dd</td>\n",
       "      <td>[麦克斯·瑞恩, 迪奥拉·拜尔德, Steph DuVall, Chris Clevelan...</td>\n",
       "      <td>[2010]</td>\n",
       "      <td>[Aram Boyrazian]</td>\n",
       "      <td>5184288</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Dry Run</td>\n",
       "      <td>[剧情, 犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115616</th>\n",
       "      <td>5c05fdb2f625b7c00b32a737</td>\n",
       "      <td>[Sebastian Ströbel, 奇娅拉·施浩拉斯, Oliver Stritzel,...</td>\n",
       "      <td>[2009]</td>\n",
       "      <td>[Heinz Dietz, Christian Theede, Alexander Dier...</td>\n",
       "      <td>5166443</td>\n",
       "      <td>[德语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[德国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Countdown - Die Jagd beginnt</td>\n",
       "      <td>[犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115976</th>\n",
       "      <td>5c05fdd2f625b7c00b32bd6a</td>\n",
       "      <td>[艾斯-T, 伊娃·门德斯, 爱琳·丹尼尔斯, 李威尹, Greg Lauren, Patr...</td>\n",
       "      <td>[2000-08-24]</td>\n",
       "      <td>[黄志强 Wong Chi-Keung]</td>\n",
       "      <td>1307974</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>追杀金手臂 The Disciples</td>\n",
       "      <td>[动作, 犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117259</th>\n",
       "      <td>5c05ffe3f625b7c00b3498d1</td>\n",
       "      <td>[Meredith Host, Kurt Indovina, Billy Garberina]</td>\n",
       "      <td>[2009-11-12]</td>\n",
       "      <td>[Chris Seaver]</td>\n",
       "      <td>4139890</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>4.3</td>\n",
       "      <td>191</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[60分钟]</td>\n",
       "      <td>污染光 Taintlight</td>\n",
       "      <td>[喜剧]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117382</th>\n",
       "      <td>5c060034f625b7c00b3520ac</td>\n",
       "      <td>[德鲁·巴里摩尔, 莎拉·吉尔伯特, 汤姆·斯凯里特, 切瑞·拉德]</td>\n",
       "      <td>[1985]</td>\n",
       "      <td>[Katt Shea]</td>\n",
       "      <td>5083769</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>欲海潮 Poison Ivy(TV 1985)</td>\n",
       "      <td>[喜剧, 爱情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117467</th>\n",
       "      <td>5c060703f625b7c00b3bd14f</td>\n",
       "      <td>[Aidan Quinn, Jared Harris]</td>\n",
       "      <td>[2000-02-01]</td>\n",
       "      <td>[米迦·琳赛-霍格]</td>\n",
       "      <td>1302658</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>8.5</td>\n",
       "      <td>74</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>None</td>\n",
       "      <td>披头四外传 Two of Us (TV)</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117471</th>\n",
       "      <td>5c060706f625b7c00b3bd54e</td>\n",
       "      <td>[中尾隆聖, 神谷明]</td>\n",
       "      <td>[1983-10-20]</td>\n",
       "      <td>[小華和ためお, 本郷みつる]</td>\n",
       "      <td>2268727</td>\n",
       "      <td>[日语]</td>\n",
       "      <td>8.7</td>\n",
       "      <td>821</td>\n",
       "      <td>[日本]</td>\n",
       "      <td>None</td>\n",
       "      <td>忍者小英雄 伊賀野カバ丸</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117535</th>\n",
       "      <td>5c060718f625b7c00b3be4d0</td>\n",
       "      <td>[Jordan Santoul, Clémentine Célarié, Xavier Ro...</td>\n",
       "      <td>[1998-11-04]</td>\n",
       "      <td>[Jérôme Foulon]</td>\n",
       "      <td>1474251</td>\n",
       "      <td>[法语]</td>\n",
       "      <td>8.0</td>\n",
       "      <td>66</td>\n",
       "      <td>[法国]</td>\n",
       "      <td>[90分钟]</td>\n",
       "      <td>爱，都有一段故事 Il n'y a pas d'amour sans histoires</td>\n",
       "      <td>[剧情, 儿童]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>121850</th>\n",
       "      <td>5c060db3f625b7c00b41034c</td>\n",
       "      <td>[Rebecca Immanuel, Julia Brendler, Anna Hausbu...</td>\n",
       "      <td>[2010]</td>\n",
       "      <td>[John Delbridge]</td>\n",
       "      <td>5156219</td>\n",
       "      <td>[德语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[德国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Katie Fforde - Festtagsstimmung</td>\n",
       "      <td>[剧情, 爱情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122203</th>\n",
       "      <td>5c060debf625b7c00b411b2b</td>\n",
       "      <td>[汉斯·沃那·麦耶, 莉萨·马丁内克]</td>\n",
       "      <td>[2006]</td>\n",
       "      <td>[Michael Rowitz]</td>\n",
       "      <td>5211917</td>\n",
       "      <td>[德语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[德国]</td>\n",
       "      <td>None</td>\n",
       "      <td>Ich leih mir eine Familie</td>\n",
       "      <td>[剧情, 喜剧, 爱情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123061</th>\n",
       "      <td>5c060f20f625b7c00b420b8b</td>\n",
       "      <td>[Iris Berben, Heino Ferch, Michael Nyqvist]</td>\n",
       "      <td>[2010]</td>\n",
       "      <td>[Urs Egger]</td>\n",
       "      <td>5176492</td>\n",
       "      <td>[德语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[德国]</td>\n",
       "      <td>[180分钟]</td>\n",
       "      <td>Kennedys Hirn</td>\n",
       "      <td>[惊悚]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124708</th>\n",
       "      <td>5c061050f625b7c00b42b3d3</td>\n",
       "      <td>[萨缪尔·韦斯特, Patrick Drury, 尼古拉斯·法瑞尔]</td>\n",
       "      <td>[1994]</td>\n",
       "      <td>[Diarmuid Lawrence]</td>\n",
       "      <td>5087337</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[英国]</td>\n",
       "      <td>None</td>\n",
       "      <td>A Breed of Heroes</td>\n",
       "      <td>[剧情, 喜剧, 犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125293</th>\n",
       "      <td>5c06211df625b7c00b4c52d7</td>\n",
       "      <td>[Danielle Panabaker, Mercedes Ruehl, Jane Krak...</td>\n",
       "      <td>[2005-09-08]</td>\n",
       "      <td>[Peter Werner]</td>\n",
       "      <td>1444492</td>\n",
       "      <td>[English]</td>\n",
       "      <td>7.8</td>\n",
       "      <td>150</td>\n",
       "      <td>[USA]</td>\n",
       "      <td>[90分钟]</td>\n",
       "      <td>妈妈十六岁 mom at sixteen</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126492</th>\n",
       "      <td>5c06280cf625b7c00b5299c6</td>\n",
       "      <td>[加布埃尔·加科, Alessandro Adriano, Gerardo Amato, S...</td>\n",
       "      <td>[2008]</td>\n",
       "      <td>[Monica Vullo]</td>\n",
       "      <td>5186729</td>\n",
       "      <td>[意大利语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[意大利]</td>\n",
       "      <td>None</td>\n",
       "      <td>我赦免你 Io ti assolvo</td>\n",
       "      <td>[犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126570</th>\n",
       "      <td>5c062841f625b7c00b52bab9</td>\n",
       "      <td>[布莱恩·布鲁姆, 克里斯托弗斯·阿特金斯, 伊丽莎白·伯克利]</td>\n",
       "      <td>[1994]</td>\n",
       "      <td>[Hal Needham]</td>\n",
       "      <td>5111802</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[94分钟]</td>\n",
       "      <td>Bandit: Bandit Goes Country</td>\n",
       "      <td>[喜剧, 犯罪]</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>224 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             _id  \\\n",
       "1263    5c02453bf625b7c00bfc1f04   \n",
       "1546    5c024543f625b7c00bfc213c   \n",
       "4417    5c0246f5f625b7c00bfc676e   \n",
       "7490    5c024aa2f625b7c00bfcb5a3   \n",
       "8467    5c024b52f625b7c00bfcdb69   \n",
       "8520    5c024b58f625b7c00bfcddb4   \n",
       "8609    5c024b62f625b7c00bfce28a   \n",
       "8705    5c024b6af625b7c00bfce7d7   \n",
       "8751    5c024b70f625b7c00bfceac6   \n",
       "8783    5c024b73f625b7c00bfcebf8   \n",
       "9318    5c024bc7f625b7c00bfd09d3   \n",
       "9783    5c024c54f625b7c00bfd71ff   \n",
       "10171   5c024c87f625b7c00bfd7d85   \n",
       "10563   5c024caaf625b7c00bfd87cb   \n",
       "11185   5c024cbef625b7c00bfd940f   \n",
       "11425   5c024cc8f625b7c00bfd9c7d   \n",
       "11830   5c024d88f625b7c00bfda7ce   \n",
       "12268   5c024de4f625b7c00bfdb573   \n",
       "12395   5c024dfdf625b7c00bfdb89d   \n",
       "13124   5c027d18f625b7c00bfe0098   \n",
       "13549   5c028621f625b7c00bfe282e   \n",
       "16084   5c0288ecf625b7c00bfe6e97   \n",
       "16475   5c0289e8f625b7c00bfe7abb   \n",
       "17001   5c028a48f625b7c00bfe8805   \n",
       "17538   5c028a83f625b7c00bfe9189   \n",
       "18007   5c028abbf625b7c00bfea03a   \n",
       "18466   5c028afcf625b7c00bfea4e2   \n",
       "18677   5c028ea7f625b7c00bff0a25   \n",
       "20417   5c029707f625b7c00bff4454   \n",
       "21279   5c029a21f625b7c00bff60ab   \n",
       "...                          ...   \n",
       "97942   5c04ea84f625b7c00b135c2c   \n",
       "99273   5c04f3edf625b7c00b170049   \n",
       "99336   5c04f407f625b7c00b1720fc   \n",
       "99905   5c05ce35f625b7c00b186484   \n",
       "100301  5c05cf8cf625b7c00b1afc2e   \n",
       "100742  5c05f5cdf625b7c00b2c325e   \n",
       "101479  5c05f61ff625b7c00b2c66d0   \n",
       "101726  5c05f63bf625b7c00b2c73e2   \n",
       "102244  5c05f682f625b7c00b2cbdf7   \n",
       "108960  5c05fb02f625b7c00b30d136   \n",
       "110769  5c05fba0f625b7c00b312e2a   \n",
       "110926  5c05fbaef625b7c00b313491   \n",
       "111184  5c05fbc3f625b7c00b313fbd   \n",
       "113214  5c05fccef625b7c00b322c95   \n",
       "115589  5c05fdaff625b7c00b32a48a   \n",
       "115608  5c05fdb1f625b7c00b32a6dd   \n",
       "115616  5c05fdb2f625b7c00b32a737   \n",
       "115976  5c05fdd2f625b7c00b32bd6a   \n",
       "117259  5c05ffe3f625b7c00b3498d1   \n",
       "117382  5c060034f625b7c00b3520ac   \n",
       "117467  5c060703f625b7c00b3bd14f   \n",
       "117471  5c060706f625b7c00b3bd54e   \n",
       "117535  5c060718f625b7c00b3be4d0   \n",
       "121850  5c060db3f625b7c00b41034c   \n",
       "122203  5c060debf625b7c00b411b2b   \n",
       "123061  5c060f20f625b7c00b420b8b   \n",
       "124708  5c061050f625b7c00b42b3d3   \n",
       "125293  5c06211df625b7c00b4c52d7   \n",
       "126492  5c06280cf625b7c00b5299c6   \n",
       "126570  5c062841f625b7c00b52bab9   \n",
       "\n",
       "                                                    actor              date  \\\n",
       "1263                 [松平哲郎, 品川美月, 松永美規, みぶ真也, 浅尾典彦, 宮瀬れい]      [2010-07-23]   \n",
       "1546       [Christian Campbell, Sarah Chalke, Carly Pope]      [1998-03-22]   \n",
       "4417                                   [羽田圭子, 羽田圭子, 雨宮夕子]  [1997-02-28(日本)]   \n",
       "7490    [欧阳震华, 李铭顺, 张世, 徐峥, 寇振海, 保剑锋, 宁静, 范文芳, 牛萌萌, 唐一...            [2005]   \n",
       "8467                [笠原弘子, 坂本千夏, 荒川太郎, 渡辺真砂子, 椎名碧流, 勝田治美]      [1993-08-14]   \n",
       "8520    [小林裕介, 坂本真绫, 村中知, Lynn, 木村珠莉, 河西健吾, 福岛润, 水树奈奈,...      [2016-12-10]   \n",
       "8609                                          [Mel Blanc]      [1979-11-27]   \n",
       "8705                                          [Mel Blanc]      [1980-04-01]   \n",
       "8751                        [田中真弓, 富山敬, 岛田敏, 小原乃梨子, 坂本千夏]            [1983]   \n",
       "8783    [德本恭敏, 千叶纱子, 佐藤せつじ, 长岛雄一, 小林沙苗, 细井治, 川中子雅人, 小松...            [2007]   \n",
       "9318    [Bette Davis, David Ackroyd, Rosanna Arquette,...      [1978-01-23]   \n",
       "9783             [Niki Rubin, Yelena Sabel, Kerri Taylor]            [2004]   \n",
       "10171      [Amrou Al-Kadhi, Nigel Allen, Ashley Campbell]  [2016-08-22(英国)]   \n",
       "10563                                [Kathleen Robertson]      [2002-03-18]   \n",
       "11185                              [刘冠霖, 李洋, 高川, 胡彩虹, 矫菲]            [2014]   \n",
       "11425                                          [周孝安, 吴慷仁]            [2007]   \n",
       "11830   [Linus Roache, James Cromwell, David Paymer, M...      [2002-08-25]   \n",
       "12268   [Victor Garber, Saul Rubinek, Michael Dolan, M...            [1988]   \n",
       "12395         [米歇尔·李, 彼得·里格特, 詹姆斯·法伦蒂洛, 芭芭拉·帕金斯, 肯尼斯·威尔什]            [1998]   \n",
       "13124   [田中真弓, 大谷育江, 矢尾一树, 平田广明, 长岛雄一, 中井和哉, 冈村明美, 山口由里子]            [1999]   \n",
       "13549                      [许飞, 黄觉, 黄渤, 张炜迅, 刘畅, 陈国新, 陈升]      [2007-09-06]   \n",
       "16084                                     [George Carlin]            [1992]   \n",
       "16475                                             [乔治·卡林]            [1988]   \n",
       "17001                      [拉德·马尔科维奇, 波格丹·迪格里克, 德拉甘·尼柯立克]            [1989]   \n",
       "17538                  [Alyson Michalka, Amanda Michalka]      [2006-03-24]   \n",
       "18007   [海伦·米伦, Brian Stirner, 理查德·帕斯科, 安格哈拉德·瑞丝, 詹姆斯·...      [1978-12-17]   \n",
       "18466   [Jim Thornton ...  Johnny Gomez, Chris Edgerly...      [1998-01-25]   \n",
       "18677                    [乔·佩西, 布兰登·费舍, 莫伊拉·凯利, 帕特里克·德姆西]      [1994-04-29]   \n",
       "20417                                            [杜夫·龙格尔]      [2008-01-15]   \n",
       "21279   [Edward Atterton, Amanda Ryan, Jacqueline Bisset]      [2000-01-09]   \n",
       "...                                                   ...               ...   \n",
       "97942                   [Karin Petersen, Nicolas Silberg]            [1971]   \n",
       "99273              [费奥纳·肖, 格拉汉姆·克罗登, 理查德·布雷默, 朱利安·林希德-图特]            [1997]   \n",
       "99336                         [卡斯派·范·戴恩, 杰姆·沃尔福特, 马克·威廉斯]      [2001-11-10]   \n",
       "99905                                        [Bill Hicks]            [1993]   \n",
       "100301                [Melissa Gilbert, Bruce Boxleitner]      [1995-09-17]   \n",
       "100742  [米夏埃尔·圭斯德克, Myriam Abbas, 苏珊妮·博曼, 多米尼克·霍卫兹, Ol...      [1999-08-12]   \n",
       "101479                                          [孟俊, 朱亚英]            [2010]   \n",
       "101726  [Stefanie Stappenbeck, Benjamin Sadler, Yvonne...      [2004-01-22]   \n",
       "102244                                      [李静, 玛利娅, 许烨]            [2009]   \n",
       "108960               [帕拉巴斯 Prabhas, 特丽莎·克里希南, Mohan Babu]            [2008]   \n",
       "110769  [Parviz Parastouei, Darine Hamze, Farideh Sepa...      [2009-03-25]   \n",
       "110926                         [李炳辰 Bingchen Li, 初星一, 焦婷]         [2011-09]   \n",
       "111184                 [琳恩·柯林斯, 凯丽·加纳, 贾斯汀·塞洛克斯, 弗兰克·兰格拉]            [2006]   \n",
       "113214                       [Jenna Dewan, Ashley Benson]      [2008-08-02]   \n",
       "115589  [Ben Gazzara, Mickey Rooney, Kirk Douglas, Cyd...      [2008-08-14]   \n",
       "115608  [麦克斯·瑞恩, 迪奥拉·拜尔德, Steph DuVall, Chris Clevelan...            [2010]   \n",
       "115616  [Sebastian Ströbel, 奇娅拉·施浩拉斯, Oliver Stritzel,...            [2009]   \n",
       "115976  [艾斯-T, 伊娃·门德斯, 爱琳·丹尼尔斯, 李威尹, Greg Lauren, Patr...      [2000-08-24]   \n",
       "117259    [Meredith Host, Kurt Indovina, Billy Garberina]      [2009-11-12]   \n",
       "117382                 [德鲁·巴里摩尔, 莎拉·吉尔伯特, 汤姆·斯凯里特, 切瑞·拉德]            [1985]   \n",
       "117467                        [Aidan Quinn, Jared Harris]      [2000-02-01]   \n",
       "117471                                        [中尾隆聖, 神谷明]      [1983-10-20]   \n",
       "117535  [Jordan Santoul, Clémentine Célarié, Xavier Ro...      [1998-11-04]   \n",
       "121850  [Rebecca Immanuel, Julia Brendler, Anna Hausbu...            [2010]   \n",
       "122203                                [汉斯·沃那·麦耶, 莉萨·马丁内克]            [2006]   \n",
       "123061        [Iris Berben, Heino Ferch, Michael Nyqvist]            [2010]   \n",
       "124708                 [萨缪尔·韦斯特, Patrick Drury, 尼古拉斯·法瑞尔]            [1994]   \n",
       "125293  [Danielle Panabaker, Mercedes Ruehl, Jane Krak...      [2005-09-08]   \n",
       "126492  [加布埃尔·加科, Alessandro Adriano, Gerardo Amato, S...            [2008]   \n",
       "126570                   [布莱恩·布鲁姆, 克里斯托弗斯·阿特金斯, 伊丽莎白·伯克利]            [1994]   \n",
       "\n",
       "                                                 director        id  \\\n",
       "1263                                              [阿見松ノ介]   5273374   \n",
       "1546                                 [Christopher Leitch]   1298504   \n",
       "4417                                               [伊藤正治]  26947984   \n",
       "7490                                                 [刘志]   1830531   \n",
       "8467                                              [难波日登志]   5349437   \n",
       "8520                                               [江崎慎平]  26933232   \n",
       "8609                          [Friz Freleng, Chuck Jones]   3545184   \n",
       "8705                                       [Friz Freleng]   3431696   \n",
       "8751                                                [笹川博]   5162570   \n",
       "8783                                       [Mamoru Kanbe]   5176755   \n",
       "9318                                           [Leo Penn]   2083813   \n",
       "9783                                         [Bill Zebub]   5363698   \n",
       "10171   [Simon Anderson, Kristen Bjorn, Karl Eccleston...  26853004   \n",
       "10563                                      [Alex Chapple]   1862712   \n",
       "11185                                                [郑锋]  26373030   \n",
       "11425                                               [陈俊志]   2297254   \n",
       "11830                                   [Robert Dornhelm]   2073229   \n",
       "12268                                      [David Greene]   5090965   \n",
       "12395                                    [Bruce McDonald]   5094442   \n",
       "13124                                             [宇田钢之介]   5339100   \n",
       "13549                                               [韩可一]   2237966   \n",
       "16084                                     [Rocco Urbisci]   3706177   \n",
       "16475                                      [Bruce Gowers]   4719672   \n",
       "17001                                           [戈兰·马克维奇]   3749810   \n",
       "17538                                [Francine McDougall]   1823230   \n",
       "18007                                     [Basil Coleman]   1958686   \n",
       "18466                                        [Eric Fogel]   2054639   \n",
       "18677                                          [阿莱克·凯西西恩]   1294104   \n",
       "20417                                    [Dolph Lundgren]   2379445   \n",
       "21279              [布赖恩 特伦查德－史密斯 (Brian Trenchard-Smith)]   1302656   \n",
       "...                                                   ...       ...   \n",
       "97942                                    [Yannick Andréi]   5148594   \n",
       "99273                                    [Deborah Warner]   5109071   \n",
       "99336                                   [Sidney J. Furie]   1303653   \n",
       "99905                                       [Chris Bould]   4935206   \n",
       "100301                                 [Richard A. Colla]   3087636   \n",
       "100742                                        [安德里亚斯·德里森]   1302633   \n",
       "101479                                              [丁 正]   5342064   \n",
       "101726                                           [乌特·维兰德]   2069799   \n",
       "102244                                              [方军亮]   3546155   \n",
       "108960                                   [Puri Jagannath]   5185546   \n",
       "110769                                    [Maziar Bahari]   3629234   \n",
       "110926                                              [武圣基]  10453724   \n",
       "111184                                      [Nanda Anand]   5221260   \n",
       "113214                                   [Tom McLoughlin]   3852275   \n",
       "115589                                    [William Karel]   4215281   \n",
       "115608                                   [Aram Boyrazian]   5184288   \n",
       "115616  [Heinz Dietz, Christian Theede, Alexander Dier...   5166443   \n",
       "115976                               [黄志强 Wong Chi-Keung]   1307974   \n",
       "117259                                     [Chris Seaver]   4139890   \n",
       "117382                                        [Katt Shea]   5083769   \n",
       "117467                                         [米迦·琳赛-霍格]   1302658   \n",
       "117471                                    [小華和ためお, 本郷みつる]   2268727   \n",
       "117535                                    [Jérôme Foulon]   1474251   \n",
       "121850                                   [John Delbridge]   5156219   \n",
       "122203                                   [Michael Rowitz]   5211917   \n",
       "123061                                        [Urs Egger]   5176492   \n",
       "124708                                [Diarmuid Lawrence]   5087337   \n",
       "125293                                     [Peter Werner]   1444492   \n",
       "126492                                     [Monica Vullo]   5186729   \n",
       "126570                                      [Hal Needham]   5111802   \n",
       "\n",
       "         language  rate rating_num     region      runtime  \\\n",
       "1263         [日语]  None       None       [日本]         None   \n",
       "1546         [英语]  None       None       [美国]         None   \n",
       "4417         [日语]  None       None       [日本]         None   \n",
       "7490        [普通话]   6.4        724     [中国大陆]         None   \n",
       "8467         [日语]  None       None       [日本]         None   \n",
       "8520         [日语]  None       None       [日本]      [103分钟]   \n",
       "8609         [英语]  None       None       [美国]         None   \n",
       "8705         [英语]  None       None       [美国]         None   \n",
       "8751         [日语]  None       None       [日本]         None   \n",
       "8783         [日语]  None       None       [日本]         None   \n",
       "9318    [English]  None       None      [USA]         None   \n",
       "9783         [英语]  None       None       [美国]       [97分钟]   \n",
       "10171        [英语]  None       None       [英国]    [2小时13分钟]   \n",
       "10563        [英语]  None       None      [加拿大]       [91分钟]   \n",
       "11185     [汉语普通话]  None       None     [中国大陆]       [90分钟]   \n",
       "11425     [汉语普通话]   6.1       1122       [台湾]  [12分钟(公映版)]   \n",
       "11830        [英语]   7.8         36       [美国]         None   \n",
       "12268        [英语]  None       None      [加拿大]         None   \n",
       "12395        [英语]  None       None      [加拿大]      [120分钟]   \n",
       "13124        [日语]   7.1        488       [日本]       [18分钟]   \n",
       "13549     [汉语普通话]   6.6       2374       [中国]         None   \n",
       "16084        [英语]   9.2        219       [美国]         None   \n",
       "16475        [英语]   7.8         88       [美国]         None   \n",
       "17001     [塞尔维亚语]   7.8         53     [南斯拉夫]       [98分钟]   \n",
       "17538   [English]   5.6        484      [USA]         None   \n",
       "18007        [英语]   8.3         54       [英国]         None   \n",
       "18466   [english]   8.5        121      [usa]         None   \n",
       "18677        [英语]   8.1        420       [美国]         None   \n",
       "20417        [英语]   6.1         90       [美国]         None   \n",
       "21279        [英语]   7.3         96  [英国 / 美国]       [90分钟]   \n",
       "...           ...   ...        ...        ...          ...   \n",
       "97942        [法语]  None       None       [法国]         None   \n",
       "99273        [英语]  None       None       [英国]         None   \n",
       "99336        [英语]  None       None      [加拿大]      [113分钟]   \n",
       "99905        [英语]   6.6         40       [美国]         None   \n",
       "100301       [英语]  None       None       [美国]      [171分钟]   \n",
       "100742       [德语]   8.9         37       [德国]         None   \n",
       "101479   [普通话/国语]   7.6         96     [中国大陆]         None   \n",
       "101726   [German]   7.6         44  [Germany]         None   \n",
       "102244      [普通话]   5.7        370     [中国大陆]       [95分钟]   \n",
       "108960     [泰卢固语]  None       None       [印度]      [150分钟]   \n",
       "110769      [伊朗语]  None       None       [伊朗]       [95分钟]   \n",
       "110926    [汉语普通话]  None       None     [中国大陆]         None   \n",
       "111184       [英语]  None       None       [美国]       [90分钟]   \n",
       "113214       [英语]  None       None       [美国]         None   \n",
       "115589  [英语 / 法语]  None       None  [美国 / 法国]       [88分钟]   \n",
       "115608       [英语]  None       None       [美国]         None   \n",
       "115616       [德语]  None       None       [德国]         None   \n",
       "115976       [英语]  None       None       [美国]         None   \n",
       "117259       [英语]   4.3        191       [美国]       [60分钟]   \n",
       "117382       [英语]  None       None       [美国]         None   \n",
       "117467       [英语]   8.5         74       [美国]         None   \n",
       "117471       [日语]   8.7        821       [日本]         None   \n",
       "117535       [法语]   8.0         66       [法国]       [90分钟]   \n",
       "121850       [德语]  None       None       [德国]         None   \n",
       "122203       [德语]  None       None       [德国]         None   \n",
       "123061       [德语]  None       None       [德国]      [180分钟]   \n",
       "124708       [英语]  None       None       [英国]         None   \n",
       "125293  [English]   7.8        150      [USA]       [90分钟]   \n",
       "126492     [意大利语]  None       None      [意大利]         None   \n",
       "126570       [英语]  None       None       [美国]       [94分钟]   \n",
       "\n",
       "                                                    title  \\\n",
       "1263                          心灵音 The Movie 心霊音 THE MOVIE   \n",
       "1546                 等着你回来 I've Been Waiting for You (TV)   \n",
       "4417                                         生保の女 それぞれの事情   \n",
       "7490                                              福禄寿三星报喜   \n",
       "8467                              3丁目のタマ おねがい!モモちゃんを捜して!!   \n",
       "8520         怪物弹珠 THE MOVIE モンスターストライク THE MOVIE はじまりの場所へ   \n",
       "8609                  Bugs Bunny's Looney Christmas Tales   \n",
       "8705                              Daffy Ducks Easter Show   \n",
       "8751                                     时间飞船系列第七部 イタダキマン   \n",
       "8783                    鬼公子 炎魔：炎魔 Demon Prince Enma: Enma   \n",
       "9318          破碎之家的黑暗秘密 \"The Dark Secret of Harvest Home\"   \n",
       "9783                     难逃魔掌 Jesus Christ: Serial Rapist   \n",
       "10171                              男孩电影15 Boys on Film 15   \n",
       "10563                        Torso: The Evelyn Dick Story   \n",
       "11185                                                黔山打拐   \n",
       "11425                                       沿海岸线征友 沿海岸线徵友   \n",
       "11830                                         罗伯特·肯尼迪 RFK   \n",
       "12268                          Liberace: Behind the Music   \n",
       "12395          Scandalous Me: The Jacqueline Susann Story   \n",
       "13124                         海贼王特别篇2路飞的绝技 海贼王SP02  路飞的绝技   \n",
       "13549                                                飞行日志   \n",
       "16084       乔治·卡林：挤在纽约 George Carlin: Jammin' in New York   \n",
       "16475   乔治·卡林：我在新泽西干啥呀？ George Carlin: What Am I Doing...   \n",
       "17001                                  交会点 Sabirni centar   \n",
       "17538                                     奶牛美女 Cow Belles   \n",
       "18007                                 皆大欢喜 As you like it   \n",
       "18466                       MTV 名人死斗 Celebrity Deathmatch   \n",
       "18677                                    乞丐博士 With Honors   \n",
       "20417                                 摩托教父 Missionary Man   \n",
       "21279                                      烈血英伦 Britannic   \n",
       "...                                                   ...   \n",
       "97942                          蒙梭罗夫人 La dame de Monsoreau   \n",
       "99273                                          Richard II   \n",
       "99336                                       回家 Going Back   \n",
       "99905                  比尔·希克斯：启示录 Bill Hicks: Revelations   \n",
       "100301                                            卓雅 Zoya   \n",
       "100742                                晚的形状 Nachtgestalten   \n",
       "101479                                                 微笑   \n",
       "101726        意大利人和他们的甜点 Italiener und andere Süßigkeiten   \n",
       "102244                                           土婆婆PK洋媳妇   \n",
       "108960                        Bujjigaadu: Made in Chennai   \n",
       "110769                               律法之书 The Book of Law   \n",
       "110926                                               万年飘香   \n",
       "111184                                  Return to Rajapur   \n",
       "113214  靓丽五人组：德州拉拉队丑闻 Fab Five: The Texas Cheerleader ...   \n",
       "115589            Meurtres à l'Empire State Building (TV)   \n",
       "115608                                            Dry Run   \n",
       "115616                       Countdown - Die Jagd beginnt   \n",
       "115976                                追杀金手臂 The Disciples   \n",
       "117259                                     污染光 Taintlight   \n",
       "117382                            欲海潮 Poison Ivy(TV 1985)   \n",
       "117467                               披头四外传 Two of Us (TV)   \n",
       "117471                                       忍者小英雄 伊賀野カバ丸   \n",
       "117535       爱，都有一段故事 Il n'y a pas d'amour sans histoires   \n",
       "121850                    Katie Fforde - Festtagsstimmung   \n",
       "122203                          Ich leih mir eine Familie   \n",
       "123061                                      Kennedys Hirn   \n",
       "124708                                  A Breed of Heroes   \n",
       "125293                               妈妈十六岁 mom at sixteen   \n",
       "126492                                 我赦免你 Io ti assolvo   \n",
       "126570                        Bandit: Bandit Goes Country   \n",
       "\n",
       "                            type  year  \n",
       "1263                        [恐怖]  None  \n",
       "1546                    [恐怖, 惊栗]  None  \n",
       "4417                    [剧情, 情色]  None  \n",
       "7490                        [剧情]  None  \n",
       "8467                    [动画, 儿童]  None  \n",
       "8520    [剧情, 动作, 动画, 儿童, 奇幻, 冒险]  None  \n",
       "8609                [喜剧, 动画, 家庭]  None  \n",
       "8705                        [动画]  None  \n",
       "8751                        [动画]  None  \n",
       "8783                        [动画]  None  \n",
       "9318                [剧情, 悬疑, 恐怖]  None  \n",
       "9783                        [犯罪]  None  \n",
       "10171           [喜剧, 同性, 犯罪, 奇幻]  None  \n",
       "10563                       [犯罪]  None  \n",
       "11185                   [剧情, 犯罪]  None  \n",
       "11425                   [剧情, 同性]  None  \n",
       "11830               [剧情, 传记, 历史]  None  \n",
       "12268           [剧情, 同性, 音乐, 传记]  None  \n",
       "12395                   [剧情, 传记]  None  \n",
       "13124               [剧情, 动画, 冒险]  None  \n",
       "13549                       [剧情]  None  \n",
       "16084                  [喜剧, 脱口秀]  None  \n",
       "16475                       [喜剧]  None  \n",
       "17001               [剧情, 喜剧, 奇幻]  None  \n",
       "17538               [喜剧, 爱情, 家庭]  None  \n",
       "18007                       [喜剧]  None  \n",
       "18466               [喜剧, 动作, 动画]  None  \n",
       "18677                   [剧情, 喜剧]  None  \n",
       "20417               [剧情, 动作, 惊悚]  None  \n",
       "21279   [剧情, 动作, 爱情, 历史, 战争, 灾难]  None  \n",
       "...                          ...   ...  \n",
       "97942                   [剧情, 历史]  None  \n",
       "99273                   [剧情, 历史]  None  \n",
       "99336                   [剧情, 战争]  None  \n",
       "99905                       [喜剧]  None  \n",
       "100301                  [剧情, 历史]  None  \n",
       "100742                      [剧情]  None  \n",
       "101479                      [剧情]  None  \n",
       "101726                      [剧情]  None  \n",
       "102244                  [剧情, 喜剧]  None  \n",
       "108960                      [动作]  None  \n",
       "110769                  [剧情, 爱情]  None  \n",
       "110926                      [爱情]  None  \n",
       "111184                  [剧情, 爱情]  None  \n",
       "113214                      [惊悚]  None  \n",
       "115589                      [犯罪]  None  \n",
       "115608                  [剧情, 犯罪]  None  \n",
       "115616                      [犯罪]  None  \n",
       "115976                  [动作, 犯罪]  None  \n",
       "117259                      [喜剧]  None  \n",
       "117382                  [喜剧, 爱情]  None  \n",
       "117467                      [剧情]  None  \n",
       "117471                      [剧情]  None  \n",
       "117535                  [剧情, 儿童]  None  \n",
       "121850                  [剧情, 爱情]  None  \n",
       "122203              [剧情, 喜剧, 爱情]  None  \n",
       "123061                      [惊悚]  None  \n",
       "124708              [剧情, 喜剧, 犯罪]  None  \n",
       "125293                      [剧情]  None  \n",
       "126492                      [犯罪]  None  \n",
       "126570                  [喜剧, 犯罪]  None  \n",
       "\n",
       "[224 rows x 13 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看这一部分的数据\n",
    "df = df_all1.loc[df_all1['year'].isnull() & df_all1['date'].notnull()]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/yeungshikam/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "df['year'] = df['date'].map(lambda x:x[0][:4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    224\n",
       "Name: year, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['year'].isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 原来的部分\n",
    "df0 = df_all1.loc[~(df_all1['year'].isnull() & df_all1['date'].notnull())]\n",
    "df0.shape\n",
    "\n",
    "# 修改的部分 df1 \n",
    "# 两部分重新整合在一起\n",
    "df_all2 = pd.concat([df0,df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(82564, 13)"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对index进行重置\n",
    "df_all2 = df_all2.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 数据格式处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>actor</th>\n",
       "      <th>date</th>\n",
       "      <th>director</th>\n",
       "      <th>id</th>\n",
       "      <th>language</th>\n",
       "      <th>rate</th>\n",
       "      <th>rating_num</th>\n",
       "      <th>region</th>\n",
       "      <th>runtime</th>\n",
       "      <th>title</th>\n",
       "      <th>type</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5c024433f625b7c00bfbe877</td>\n",
       "      <td>[扎克·埃夫隆, 莱斯利·曼恩, 托马斯·列农, 马修·派瑞, Tyler Steelman...</td>\n",
       "      <td>[2009-04-17(美国)]</td>\n",
       "      <td>[布尔·斯蒂尔斯]</td>\n",
       "      <td>2213591</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.3</td>\n",
       "      <td>155828</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[102 分钟]</td>\n",
       "      <td>重返十七岁 17 Again</td>\n",
       "      <td>[剧情, 喜剧, 爱情]</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5c024433f625b7c00bfbe87f</td>\n",
       "      <td>[狄龙, 张国荣, 周润发, 石天, 关山, 朱宝意, 曾江, 成奎安, 吴孟达, 王正方]</td>\n",
       "      <td>[1987-12-17(香港)]</td>\n",
       "      <td>[吴宇森]</td>\n",
       "      <td>1297862</td>\n",
       "      <td>[粤语 / 汉语普通话 / 英语]</td>\n",
       "      <td>8.2</td>\n",
       "      <td>70082</td>\n",
       "      <td>[香港]</td>\n",
       "      <td>[105 分钟]</td>\n",
       "      <td>英雄本色2</td>\n",
       "      <td>[剧情, 动作, 犯罪]</td>\n",
       "      <td>1987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5c024433f625b7c00bfbe8df</td>\n",
       "      <td>[朱丽叶·比诺什, 贝努特·里格恩特, 弗罗伦斯·派梅尔, 夏洛特·韦里, 海伦·文森特, ...</td>\n",
       "      <td>[1993-09-08(法国), 1993-10-10(波兰)]</td>\n",
       "      <td>[克日什托夫·基耶斯洛夫斯基]</td>\n",
       "      <td>1292048</td>\n",
       "      <td>[法语 / 波兰语 / 罗马尼亚语]</td>\n",
       "      <td>8.5</td>\n",
       "      <td>74953</td>\n",
       "      <td>[法国 / 波兰 / 瑞士]</td>\n",
       "      <td>[98分钟]</td>\n",
       "      <td>蓝白红三部曲之蓝 Trois couleurs: Bleu</td>\n",
       "      <td>[剧情, 爱情, 音乐]</td>\n",
       "      <td>1993</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5c024434f625b7c00bfbe974</td>\n",
       "      <td>[威尔·史密斯, 罗莎里奥·道森, 伍迪·哈里森, 巴里·佩珀, 迈克尔·伊雷, 比尔·斯米...</td>\n",
       "      <td>[2008-12-19]</td>\n",
       "      <td>[加布里埃莱·穆奇诺]</td>\n",
       "      <td>2969282</td>\n",
       "      <td>[英语 / 西班牙语]</td>\n",
       "      <td>8.1</td>\n",
       "      <td>95242</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[123 分钟]</td>\n",
       "      <td>七磅 Seven Pounds</td>\n",
       "      <td>[剧情]</td>\n",
       "      <td>2008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5c024434f625b7c00bfbe9ba</td>\n",
       "      <td>[阿萨·巴特菲尔德, 伊娃·格林, 塞缪尔·杰克逊, 朱迪·丹奇, 艾拉·珀内尔, 克里斯·...</td>\n",
       "      <td>[2016-12-02(中国大陆), 2016-09-30(美国)]</td>\n",
       "      <td>[蒂姆·波顿]</td>\n",
       "      <td>7051830</td>\n",
       "      <td>[英语]</td>\n",
       "      <td>7.2</td>\n",
       "      <td>106468</td>\n",
       "      <td>[美国]</td>\n",
       "      <td>[127分钟]</td>\n",
       "      <td>佩小姐的奇幻城堡 Miss Peregrine's Home for Peculiar Ch...</td>\n",
       "      <td>[剧情, 奇幻, 冒险]</td>\n",
       "      <td>2016</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        _id  \\\n",
       "0  5c024433f625b7c00bfbe877   \n",
       "1  5c024433f625b7c00bfbe87f   \n",
       "2  5c024433f625b7c00bfbe8df   \n",
       "3  5c024434f625b7c00bfbe974   \n",
       "4  5c024434f625b7c00bfbe9ba   \n",
       "\n",
       "                                               actor  \\\n",
       "0  [扎克·埃夫隆, 莱斯利·曼恩, 托马斯·列农, 马修·派瑞, Tyler Steelman...   \n",
       "1     [狄龙, 张国荣, 周润发, 石天, 关山, 朱宝意, 曾江, 成奎安, 吴孟达, 王正方]   \n",
       "2  [朱丽叶·比诺什, 贝努特·里格恩特, 弗罗伦斯·派梅尔, 夏洛特·韦里, 海伦·文森特, ...   \n",
       "3  [威尔·史密斯, 罗莎里奥·道森, 伍迪·哈里森, 巴里·佩珀, 迈克尔·伊雷, 比尔·斯米...   \n",
       "4  [阿萨·巴特菲尔德, 伊娃·格林, 塞缪尔·杰克逊, 朱迪·丹奇, 艾拉·珀内尔, 克里斯·...   \n",
       "\n",
       "                                 date         director       id  \\\n",
       "0                    [2009-04-17(美国)]        [布尔·斯蒂尔斯]  2213591   \n",
       "1                    [1987-12-17(香港)]            [吴宇森]  1297862   \n",
       "2    [1993-09-08(法国), 1993-10-10(波兰)]  [克日什托夫·基耶斯洛夫斯基]  1292048   \n",
       "3                        [2008-12-19]      [加布里埃莱·穆奇诺]  2969282   \n",
       "4  [2016-12-02(中国大陆), 2016-09-30(美国)]          [蒂姆·波顿]  7051830   \n",
       "\n",
       "             language rate rating_num          region   runtime  \\\n",
       "0                [英语]  7.3     155828            [美国]  [102 分钟]   \n",
       "1   [粤语 / 汉语普通话 / 英语]  8.2      70082            [香港]  [105 分钟]   \n",
       "2  [法语 / 波兰语 / 罗马尼亚语]  8.5      74953  [法国 / 波兰 / 瑞士]    [98分钟]   \n",
       "3         [英语 / 西班牙语]  8.1      95242            [美国]  [123 分钟]   \n",
       "4                [英语]  7.2     106468            [美国]   [127分钟]   \n",
       "\n",
       "                                               title          type  year  \n",
       "0                                     重返十七岁 17 Again  [剧情, 喜剧, 爱情]  2009  \n",
       "1                                              英雄本色2  [剧情, 动作, 犯罪]  1987  \n",
       "2                      蓝白红三部曲之蓝 Trois couleurs: Bleu  [剧情, 爱情, 音乐]  1993  \n",
       "3                                    七磅 Seven Pounds          [剧情]  2008  \n",
       "4  佩小姐的奇幻城堡 Miss Peregrine's Home for Peculiar Ch...  [剧情, 奇幻, 冒险]  2016  "
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all2.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 语言 language"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [],
   "source": [
    "# [粤语 / 汉语普通话 / 英语] 转为 ['粤语','汉语普通话','英语']\n",
    "df_all2['language'] = df_all2['language'].map(lambda x:\"\".join(x).split(\"/\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 再去掉list中每个字符串的前后空格\n",
    "df_all2['language'] = df_all2['language'].map(lambda x :[i.strip() for i in x])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 地区 region"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 同语言\n",
    "df_all2['region'] = df_all2['region'].map(lambda x:\"\".join(x).split(\"/\"))\n",
    "df_all2['region'] = df_all2['region'].map(lambda x :[i.strip() for i in x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "region_set = set()\n",
    "for each in df_all2['region']:\n",
    "    for i in each:\n",
    "        region_set.add(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'(Canada)',\n",
       " '(Indonesia)',\n",
       " '(Spain)',\n",
       " '(Turkey)',\n",
       " '1958-06-29',\n",
       " 'ATV',\n",
       " 'Afghanistan',\n",
       " 'Albania',\n",
       " 'Algeria',\n",
       " 'America',\n",
       " 'American',\n",
       " 'Angola',\n",
       " 'Argentina',\n",
       " 'Argentina)',\n",
       " 'Armania',\n",
       " 'Armenia',\n",
       " 'Aruba',\n",
       " 'Australia',\n",
       " 'Austria',\n",
       " 'BBC',\n",
       " 'Bahamas',\n",
       " 'Bangladesh',\n",
       " 'Belarus',\n",
       " 'Belgium',\n",
       " 'Benin',\n",
       " 'Bhutan',\n",
       " 'Bosnia and Herzegovina',\n",
       " 'Brasil',\n",
       " 'Brazil',\n",
       " 'Bulgaria',\n",
       " 'Burkina Faso',\n",
       " 'CANADA',\n",
       " 'Cambodia',\n",
       " 'Cameroon',\n",
       " 'Canada',\n",
       " 'Chile',\n",
       " 'China',\n",
       " 'Colombia',\n",
       " 'Costa Rica',\n",
       " 'Croatia',\n",
       " 'Cuba',\n",
       " 'Cyprus',\n",
       " 'Czech Republic',\n",
       " 'Czech Republic | Canada',\n",
       " 'Czechoslovakia',\n",
       " 'Denmark',\n",
       " 'Dominica',\n",
       " 'Dominican Republic',\n",
       " 'East Germany',\n",
       " 'Egypt',\n",
       " 'English',\n",
       " 'Estonia',\n",
       " 'Federal Republic of Yugoslavia',\n",
       " 'Finland',\n",
       " 'France',\n",
       " 'French',\n",
       " 'Georgia',\n",
       " 'Germany',\n",
       " 'Germany Germany',\n",
       " 'Ghana',\n",
       " 'Greece',\n",
       " 'Greenland',\n",
       " 'Guatemala',\n",
       " 'Guinea',\n",
       " 'Hungary',\n",
       " 'Iceland',\n",
       " 'India',\n",
       " 'Indonesia',\n",
       " 'Iran',\n",
       " 'Ireland',\n",
       " 'Israel',\n",
       " 'Italy',\n",
       " 'Jamaica',\n",
       " 'Japan',\n",
       " 'Kazakhstan',\n",
       " 'Kenya',\n",
       " 'Kosovo',\n",
       " 'Latvia',\n",
       " 'Lebanon',\n",
       " 'Liechtenstein',\n",
       " 'Lithuania',\n",
       " 'Luxembourg',\n",
       " 'Malaysia',\n",
       " 'Mali',\n",
       " 'Malta',\n",
       " 'Martinique',\n",
       " 'Mauritania',\n",
       " 'Mexico',\n",
       " 'Mexico | Canada',\n",
       " 'Moldova',\n",
       " 'Monaco',\n",
       " 'Mongolia',\n",
       " 'Morocco',\n",
       " 'Namibia',\n",
       " 'Netherlands',\n",
       " 'Netherlands Antilles',\n",
       " 'New Zealand',\n",
       " 'Nigeria',\n",
       " 'North Korea',\n",
       " 'North Vietnam',\n",
       " 'Norway',\n",
       " 'Pakistan',\n",
       " 'Palestine',\n",
       " 'Panama',\n",
       " 'Peru',\n",
       " 'Peru | Argentina',\n",
       " 'Philippines',\n",
       " 'Poland',\n",
       " 'Poland | Czech Republic',\n",
       " 'Portugal',\n",
       " 'Puerto Rico',\n",
       " 'Republic of Macedonia',\n",
       " 'Romania',\n",
       " 'Russia',\n",
       " 'Russia, Soviet Union',\n",
       " 'Russian',\n",
       " 'Russion',\n",
       " 'Senegal',\n",
       " 'Serbia',\n",
       " 'Serbia and Montenegro',\n",
       " 'Serbia)',\n",
       " 'Slovakia',\n",
       " 'Slovenia',\n",
       " 'South Africa',\n",
       " 'South Korea',\n",
       " 'Soviet Union',\n",
       " 'Spain',\n",
       " 'Spain | Switzerland',\n",
       " 'Sri Lanka',\n",
       " 'Sweden',\n",
       " 'Switzerland',\n",
       " 'Syria',\n",
       " 'TNT电视网',\n",
       " 'Tanzania',\n",
       " 'Thailand',\n",
       " 'Thailand | Hong Kong',\n",
       " 'Trinidad and Tobago',\n",
       " 'Tunisia',\n",
       " 'Turkey',\n",
       " 'U.S.A',\n",
       " 'UK',\n",
       " 'US',\n",
       " 'USA',\n",
       " 'USA | Germany | Mexico',\n",
       " 'Ukraine',\n",
       " 'United Arab Emirates',\n",
       " 'United States USA',\n",
       " 'Uruguay',\n",
       " 'Uzbekistan',\n",
       " 'Venezuela',\n",
       " 'Vietnam',\n",
       " 'West Germany',\n",
       " 'Yugoslavia',\n",
       " 'Zambia',\n",
       " 'Zimbabwe',\n",
       " 'brazil',\n",
       " 'denmark',\n",
       " 'india',\n",
       " 'indonesia',\n",
       " 'philippines',\n",
       " 'purtugal',\n",
       " 'sweden',\n",
       " 'uk',\n",
       " 'usa',\n",
       " '不丹',\n",
       " '东德',\n",
       " '东德 East Germany',\n",
       " '中国',\n",
       " '中国台湾',\n",
       " '中国大陆',\n",
       " '中国香港',\n",
       " '中国（杭州）',\n",
       " '丹麦',\n",
       " '丹麦  Denmark',\n",
       " '丹麦 Danmark',\n",
       " '丹麦 Denmark',\n",
       " '丹麦 Kongeriget Danmark',\n",
       " '乌克兰',\n",
       " '乌克兰 Ukraine',\n",
       " '乌兹别克斯坦',\n",
       " '乌兹别克斯坦共和国',\n",
       " '乌拉圭',\n",
       " '乌拉圭Uruguay',\n",
       " '乍得',\n",
       " '也门',\n",
       " '亚美尼亚',\n",
       " '以色列',\n",
       " '以色列 Israel',\n",
       " '以色列Israel',\n",
       " '伊拉克',\n",
       " '伊拉克 Iraq',\n",
       " '伊拉克 比利时 Iraq Belgium',\n",
       " '伊朗',\n",
       " '伊朗 Iran',\n",
       " '伯利兹',\n",
       " '佛得角',\n",
       " '俄国',\n",
       " '俄国 Russia',\n",
       " '俄罗斯',\n",
       " '俄罗斯 Russia',\n",
       " '俄罗斯 Russian',\n",
       " '俄罗斯 russia',\n",
       " '俄罗斯Russia',\n",
       " '俄羅斯',\n",
       " '俄语',\n",
       " '保加利亚',\n",
       " '保加利亚 Bulgaria',\n",
       " '克罗地亚',\n",
       " '克罗地亚共和国',\n",
       " '克罗地亚语',\n",
       " '关岛',\n",
       " '冰岛',\n",
       " '冰岛 Iceland',\n",
       " '冰島 Iceland',\n",
       " '列支敦士登',\n",
       " '刚果',\n",
       " '利比亚',\n",
       " '利比里亚',\n",
       " '前南斯拉夫',\n",
       " '前捷克斯洛伐克',\n",
       " '前苏联',\n",
       " '前苏联  Soviet Union',\n",
       " '前苏联 Soviet Union',\n",
       " '前西德',\n",
       " '前西德  意大利  法国',\n",
       " '加拿大',\n",
       " '加拿大  Canada',\n",
       " '加拿大 Canada',\n",
       " '加拿大Canada',\n",
       " '加泰罗尼亚',\n",
       " '加纳',\n",
       " '加蓬',\n",
       " '匈牙利',\n",
       " '匈牙利  Hungary',\n",
       " '匈牙利 Hungarian',\n",
       " '匈牙利 Hungary',\n",
       " '匈牙利語',\n",
       " '北印度 Hindi',\n",
       " '北朝鮮',\n",
       " '北朝鲜',\n",
       " '南亚',\n",
       " '南斯拉夫',\n",
       " '南斯拉夫 Yugoslavia',\n",
       " '南斯拉夫联盟共和国',\n",
       " '南斯拉夫联邦共和国',\n",
       " '南联盟',\n",
       " '南非',\n",
       " '南非 South Africa',\n",
       " '博茨瓦纳',\n",
       " '卡塔尔',\n",
       " '卡達',\n",
       " '卢森堡',\n",
       " '卢森堡 Luxembourg',\n",
       " '印尼',\n",
       " '印尼 Indonesia',\n",
       " '印度',\n",
       " '印度  India',\n",
       " '印度  Indian',\n",
       " '印度 Idian',\n",
       " '印度 India',\n",
       " '印度 Indian',\n",
       " '印度 indai',\n",
       " '印度 india',\n",
       " '印度India',\n",
       " '印度indian',\n",
       " '印度尼西亚',\n",
       " '印度尼西亚  Indonesia',\n",
       " '印度尼西亚 Indonesia',\n",
       " '印度尼西亞',\n",
       " '危地马拉',\n",
       " '厄瓜多尔',\n",
       " '厄瓜多尔 Ecuador',\n",
       " '原西德',\n",
       " '原西德  奥地利',\n",
       " '叙利亚',\n",
       " '古巴',\n",
       " '古巴 Cuba',\n",
       " '古巴Cuba',\n",
       " '台湾',\n",
       " '吉尔吉斯斯坦',\n",
       " '哈萨克斯坦',\n",
       " '哈萨克斯坦 Kazakhstan',\n",
       " '哈薩克斯坦',\n",
       " '哥伦比亚',\n",
       " '哥伦比亚 Colombia',\n",
       " '哥斯达黎加',\n",
       " '哥斯达黎加 Costa Rica',\n",
       " '喀麦隆',\n",
       " '土耳其',\n",
       " '土耳其 Turkey',\n",
       " '土耳其Turkey',\n",
       " '埃及',\n",
       " '埃及 Egipt',\n",
       " '埃塞俄比亚',\n",
       " '埃塞俄比亚 Ethiopia',\n",
       " '埃塞尔比亚',\n",
       " '塔吉克斯坦',\n",
       " '塞内加尔',\n",
       " '塞内加尔 Senegal',\n",
       " '塞尔维亚',\n",
       " '塞尔维亚 Serbia',\n",
       " '塞尔维亚Serbia',\n",
       " '塞尔维亚和孟的内哥',\n",
       " '塞尔维亚和黑山',\n",
       " '塞浦路斯',\n",
       " '塞爾維亞',\n",
       " '塞黑',\n",
       " '墨西哥',\n",
       " '墨西哥  Mexico',\n",
       " '墨西哥 Mexico',\n",
       " '墨西哥Mexico',\n",
       " '多米尼亚',\n",
       " '多米尼加',\n",
       " '多米尼加共和国',\n",
       " '奥地利',\n",
       " '奥地利  德国  日本',\n",
       " '奥地利 Austria',\n",
       " '奧地利',\n",
       " '委內瑞拉',\n",
       " '委内瑞拉',\n",
       " '委内瑞拉 Venezuela',\n",
       " '孟加拉',\n",
       " '孟加拉共和国',\n",
       " '孟加拉国',\n",
       " '安哥拉',\n",
       " '安哥拉 Angola',\n",
       " '尼加拉瓜',\n",
       " '尼日利亚',\n",
       " '尼日利亚 Nigeria',\n",
       " '尼日尔',\n",
       " '尼泊尔',\n",
       " '尼泊尔 Nepal',\n",
       " '巴勒斯坦',\n",
       " '巴勒斯坦被占领区',\n",
       " '巴哈马',\n",
       " '巴哈马 Bahamas',\n",
       " '巴基斯坦',\n",
       " '巴基斯坦 Pakistan',\n",
       " '巴基斯坦Pakistan',\n",
       " '巴布亚新几内亚',\n",
       " '巴拉圭',\n",
       " '巴拿马',\n",
       " '巴拿马Panama',\n",
       " '巴西',\n",
       " '巴西 Brazil',\n",
       " '巴西Brazil',\n",
       " '巴貝多 Barbados',\n",
       " '布基纳法索',\n",
       " '布基那法索',\n",
       " '希腊',\n",
       " '希腊  Greece',\n",
       " '希腊 Greece',\n",
       " '希腊 Greek',\n",
       " '希腊，西班牙',\n",
       " '幾內亞比索',\n",
       " '开曼群岛',\n",
       " '德国',\n",
       " '德意志联邦共和国',\n",
       " '德语',\n",
       " '意大利',\n",
       " '意大利 Italy  法国 France',\n",
       " '愛爾蘭、荷蘭、匈牙利',\n",
       " '拉脱维亚',\n",
       " '拉脱维亚 Latvia',\n",
       " '挪威',\n",
       " '挪威 Norway',\n",
       " '挪威语',\n",
       " '捷克',\n",
       " '捷克 Czech',\n",
       " '捷克 Czech Republic',\n",
       " '捷克共和国',\n",
       " '捷克斯洛伐克',\n",
       " '捷克斯洛伐克 Czechoslovakia',\n",
       " '捷克語',\n",
       " '摩尔多瓦',\n",
       " '摩洛哥',\n",
       " '摩洛哥 Morocco',\n",
       " '摩爾多瓦',\n",
       " '摩纳哥',\n",
       " '摩纳哥 Monaco',\n",
       " '文莱',\n",
       " '斐济',\n",
       " '斯洛伐克',\n",
       " '斯洛伐克 Slovakia',\n",
       " '斯洛文尼亚',\n",
       " '斯洛文尼亚 Slovenian',\n",
       " '斯诺文尼亚',\n",
       " '斯里兰卡',\n",
       " '新加坡',\n",
       " '新加坡 Singapore',\n",
       " '新加坡 singapore',\n",
       " '新家坡',\n",
       " '新西兰',\n",
       " '新西兰 Netherlands',\n",
       " '新西兰 New Zealand',\n",
       " '日本',\n",
       " '智利',\n",
       " '智利 Chile',\n",
       " '智利 | 阿根廷',\n",
       " '朝鲜',\n",
       " '朝鲜 North Korea',\n",
       " '朝鲜民主主义人民共和国',\n",
       " '柬埔寨',\n",
       " '柬埔寨 Cambodia',\n",
       " '格陵兰',\n",
       " '格陵兰岛',\n",
       " '格魯吉亞',\n",
       " '格鲁吉亚',\n",
       " '格鲁吉亚 Georgia',\n",
       " '比利时',\n",
       " '比利时  Belgium',\n",
       " '比利时 Belgique',\n",
       " '比利时 Belgium',\n",
       " '比利時',\n",
       " '毛利塔尼亚',\n",
       " '毛里塔尼亚',\n",
       " '沙特阿拉伯',\n",
       " '法国',\n",
       " '法国  荷兰',\n",
       " '法國 France',\n",
       " '法罗群岛',\n",
       " '波兰',\n",
       " '波兰  Poland',\n",
       " '波兰 Poland',\n",
       " '波多黎各',\n",
       " '波多黎各 Pueto Rico',\n",
       " '波斯尼亚',\n",
       " '波斯尼亚及黑塞哥维那',\n",
       " '波斯尼亚和黑塞哥维那',\n",
       " '波蘭',\n",
       " '波黑',\n",
       " '泰国',\n",
       " '泰國',\n",
       " '洪都拉斯',\n",
       " '洪都拉斯 Honduras',\n",
       " '海地',\n",
       " '澳大利亚',\n",
       " '澳大利亚 Australia',\n",
       " '澳大利亚 Austrlia',\n",
       " '澳大利亚 australia',\n",
       " '澳大利亚Australia',\n",
       " '澳大利亞',\n",
       " '澳洲 Australia',\n",
       " '澳門',\n",
       " '澳门',\n",
       " '烏克蘭',\n",
       " '爱尔兰',\n",
       " '爱尔兰 Ireland',\n",
       " '爱沙尼亚',\n",
       " '爱沙尼亚共和国 Estonia',\n",
       " '牙买加',\n",
       " '牙买加 Jamaica',\n",
       " '特立尼达和多巴哥',\n",
       " '玻利维亚',\n",
       " '玻利维亚bolivia',\n",
       " '瑞典',\n",
       " '瑞典  Sweden',\n",
       " '瑞典 Sweden',\n",
       " '瑞典Sweden',\n",
       " '瑞士',\n",
       " '瑞士  Switzerland',\n",
       " '瑞士 Switzerland',\n",
       " '瓦努阿图',\n",
       " '白俄罗斯',\n",
       " '科威特',\n",
       " '科威特 Kuwait',\n",
       " '科特迪瓦',\n",
       " '科索沃',\n",
       " '科索沃 Kosovo',\n",
       " '秘鲁',\n",
       " '秘鲁 Peru',\n",
       " '突尼斯',\n",
       " '突尼斯Tunisia',\n",
       " '立陶宛',\n",
       " '約旦 Jordan',\n",
       " '索马里',\n",
       " '约旦',\n",
       " '缅甸',\n",
       " '罗马利亚',\n",
       " '罗马尼亚',\n",
       " '罗马尼亚 Romania',\n",
       " '美国',\n",
       " '美国 USA（ABC电视网）',\n",
       " '美國',\n",
       " '美國 USA',\n",
       " '老挝',\n",
       " '联邦德国',\n",
       " '肯尼亚',\n",
       " '芬兰',\n",
       " '芬兰  Finland',\n",
       " '芬兰 Finland',\n",
       " '芬蘭 Finland',\n",
       " '苏丹',\n",
       " '苏联',\n",
       " '苏联  保加利亚',\n",
       " '苏联 CCCP',\n",
       " '苏联 Soviet Union',\n",
       " '苏联 USSR',\n",
       " '英国',\n",
       " '英国  原西德',\n",
       " '英国  美国',\n",
       " '英国  西班牙',\n",
       " '英国BBC',\n",
       " '英国属地曼岛',\n",
       " '英國 UK',\n",
       " '荷兰',\n",
       " '荷兰 Netherland',\n",
       " '荷兰 Netherlands',\n",
       " '荷兰Netherlands',\n",
       " '荷蘭 Netherlands',\n",
       " '莫三比克',\n",
       " '莫桑比克',\n",
       " '菲律宾',\n",
       " '菲律宾 Philipine',\n",
       " '菲律宾 Philipines',\n",
       " '菲律宾 Philippine',\n",
       " '菲律宾 Philippines',\n",
       " '菲律宾Philippine',\n",
       " '萨摩亚',\n",
       " '葡萄牙',\n",
       " '葡萄牙 Portugal',\n",
       " '蒙古',\n",
       " '蒙古国',\n",
       " '蘇聯',\n",
       " '西德',\n",
       " '西德 West Germany',\n",
       " '西班牙',\n",
       " '西班牙  德国',\n",
       " '西班牙 SPain',\n",
       " '赤道几内亚',\n",
       " '越南',\n",
       " '越南 Vietnam',\n",
       " '越南Vietnam',\n",
       " '阿塞拜疆',\n",
       " '阿塞拜疆 Azerbaijan',\n",
       " '阿富汗',\n",
       " '阿尔及利亚',\n",
       " '阿尔巴尼亚',\n",
       " '阿尔巴尼亚 Albania',\n",
       " '阿拉伯联合酋长国',\n",
       " '阿根廷',\n",
       " '阿根廷  Argentina',\n",
       " '阿根廷 Argentina',\n",
       " '阿联酋',\n",
       " '阿鲁巴',\n",
       " '阿鲁巴岛',\n",
       " '韩国',\n",
       " '香港',\n",
       " '马其顿',\n",
       " '马其顿共和国',\n",
       " '马拉西亚',\n",
       " '马提尼克岛',\n",
       " '马来西亚',\n",
       " '马来西亚  Malaysia',\n",
       " '马来西亚 Malaysia',\n",
       " '马耳他',\n",
       " '马里',\n",
       " '魁北克',\n",
       " '黎巴嫩',\n",
       " '黎巴嫩  Lebanon',\n",
       " '黑山'}"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "region_set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 可以从上面看到同一个国家会有多个表达\n",
    "# 对于中英混合的表达转换为纯中文，且将繁体转换为中文\n",
    "def ZhEn2Zh(x):\n",
    "    pattern = re.compile(\"[一-龥]+\")\n",
    "    if pattern.match(x):\n",
    "        x = re.sub(\"[^一-龥]+\",\"\", x)   \n",
    "    # 繁体转换为简体\n",
    "    x = langconv.Converter('zh-hans').convert(x)     \n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all2['region'] = df_all2['region'].map(lambda x: [ZhEn2Zh(i) for i in x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对于英文的表达，选一些主流地区进行转换\n",
    "region_dict = {\"America\":\"美国\",\"American\":\"美国\",\"US\":\"美国\",\"U.S.A\":\"美国\",\"USA\":\"美国\",\"usa\":\"美国\",\n",
    "               \"United States USA\":\"美国\",\"Argentina\":\"阿根廷\",\"Argentina)\":\"阿根廷\",\"Mexico\":\"墨西哥\",\n",
    "               \"Canada\":\"加拿大\",\"CANADA\":\"加拿大\",\"(Canada)\":\"加拿大\",\"Brazil\":\"巴西\",\"Brasil\":\"巴西\",\n",
    "               \"BBC\":\"英国\",\"UK\":\"英国\",\"uk\":\"英国\",\n",
    "               \"Australia\":\"澳大利亚\",\"Austria\":\"澳大利亚\",                             \n",
    "               \"china\":\"中国\",\"China\":\"中国\",\"中国杭州\":\"中国\",\n",
    "               \"India\":\"印度\",\"india\":\"印度\",\"Japan\":\"日本\",\"South Korea\":\"韩国\",\n",
    "               \"(Spain)\":\"西班牙\",\"Spain\":\"西班牙\",\"Germany\":\"德国\",\"Germany Germany\":\"德国\",\n",
    "               \"Sweden\":\"瑞典\",\"sweden\":\"瑞典\",\"France\":\"法国\",\"Franch\":\"法国\",\"Italy\":\"意大利\",\n",
    "               \"Russia\":\"俄罗斯\",\"Russian\":\"俄罗斯\",\"Russion\":\"俄罗斯\",\n",
    "               \"俄国\":\"俄罗斯\",\"俄语\":\"俄罗斯\",\"Soviet Union\":\"苏联\",\n",
    "               \"印尼\":\"印度尼西亚\",\"(Indonesia)\":\"印度尼西亚\",\"Indonesia\":\"印度尼西亚\",\"indonesia\":\"印度尼西亚\"}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "def region_update(x):\n",
    "    if x in region_dict:\n",
    "        x = region_dict[x]\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all2['region'] = df_all2['region'].map(lambda x: [region_update(i) for i in x])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 片长 runtime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all2['runtime'] = df_all2['runtime'].map(lambda x:re.findall(\"\\d+\",x[0])[0] if x else None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    102\n",
       "1    105\n",
       "2     98\n",
       "3    123\n",
       "4    127\n",
       "Name: runtime, dtype: object"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all2['runtime'].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 评分rate 、评分人数rating_num 年份year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 更改为数值型\n",
    "df_all2['rate'] = df_all2['rate'].astype(float)\n",
    "df_all2['rating_num'] = df_all2['rating_num'].astype(float)\n",
    "df_all2['year'] = df_all2['year'].astype(int)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 筛选掉数据爬取时未上映的电影"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(82564, 13)"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "# year: 2019\n",
    "df_all3 = df_all2.loc[~(df_all2['year'] >=2019)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(82287, 13)"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_all3.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df_all3.to_csv(\"douban-world-moive-data.csv\",index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "navigate_num": "#000000",
    "navigate_text": "#333333",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700",
    "sidebar_border": "#EEEEEE",
    "wrapper_background": "#FFFFFF"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "12px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": true,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
